[TTI][RISCV] Implement cost of some intrinsics with LMUL #117874

LiqinWeng · 2024-11-27T12:25:54Z

Intrinsics include: sadd_sat/ssub_sat/uadd_sat/usub_sat/fabs/fsqrt/cttz/ctlz/ctpop

llvmbot · 2024-11-27T12:26:28Z

@llvm/pr-subscribers-backend-risc-v

@llvm/pr-subscribers-llvm-analysis

Author: LiqinWeng (LiqinWeng)

Changes

Intrinsics include: sadd_sat/ssub_sat/uadd_sat/usub_sat/fabs/fsqrt/cttz/ctlz/ctpop

Patch is 73.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117874.diff

5 Files Affected:

(modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+64-6)
(modified) llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll (+34-22)
(modified) llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll (+17-17)
(modified) llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll (+54-54)
(modified) llvm/test/Analysis/CostModel/RISCV/int-sat-math.ll (+60-60)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 8f0ef69258b165..e2cd71fb5a9f40 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1013,20 +1013,78 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
   case Intrinsic::sadd_sat:
   case Intrinsic::ssub_sat:
   case Intrinsic::uadd_sat:
-  case Intrinsic::usub_sat:
-  case Intrinsic::fabs:
+  case Intrinsic::usub_sat: {
+    auto LT = getTypeLegalizationCost(RetTy);
+    if (ST->hasVInstructions() && LT.second.isVector()) {
+      unsigned Op;
+      switch (ICA.getID()) {
+      case Intrinsic::sadd_sat:
+        Op = RISCV::VSADD_VV;
+        break;
+      case Intrinsic::ssub_sat:
+        Op = RISCV::VSSUBU_VV;
+        break;
+      case Intrinsic::uadd_sat:
+        Op = RISCV::VSADDU_VV;
+        break;
+      case Intrinsic::usub_sat:
+        Op = RISCV::VSSUBU_VV;
+        break;
+      }
+      return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind);
+    }
+    break;
+  }
+  case Intrinsic::fabs: {
+    auto LT = getTypeLegalizationCost(RetTy);
+    // FIXME: not get the correct cost about the llvm.sqrt.vxbf16
+    // LT.second is promote llvm::MVT::f32
+    if (ST->hasVInstructions() && LT.second.isVector()) {
+      // lui a0, 8
+      // addi a0, a0, -1
+      // vsetvli a1, zero, e16, m1, ta, ma
+      // vand.vx v8, v8, a0
+      if (LT.second.getVectorElementType() == MVT::bf16 ||
+          (LT.second.getVectorElementType() == MVT::f16 &&
+           !ST->hasVInstructionsF16()))
+        return LT.first * getRISCVInstructionCost(RISCV::VAND_VX, LT.second,
+                                                  CostKind) +
+               2;
+      else
+        return LT.first *
+               getRISCVInstructionCost(RISCV::VFSGNJX_VV, LT.second, CostKind);
+    }
+    break;
+  }
   case Intrinsic::sqrt: {
     auto LT = getTypeLegalizationCost(RetTy);
-    if (ST->hasVInstructions() && LT.second.isVector())
-      return LT.first;
+    // FIXME: not get the correct cost about the llvm.sqrt.vxbf16
+    // LT.second is promote llvm::MVT::f32
+    if (ST->hasVInstructions() && LT.second.isVector()) {
+      SmallVector<unsigned, 3> Opcodes;
+      if (LT.second.getVectorElementType() == MVT::bf16)
+        Opcodes = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFSQRT_V,
+                   RISCV::VFNCVTBF16_F_F_W};
+      else if (LT.second.getVectorElementType() == MVT::f16 &&
+               !ST->hasVInstructionsF16())
+        Opcodes = {RISCV::VFWCVT_F_F_V, RISCV::VFSQRT_V, RISCV::VFNCVT_F_F_W};
+      else
+        Opcodes = {RISCV::VFSQRT_V};
+      return LT.first *
+             getRISCVInstructionCost(RISCV::VFSQRT_V, LT.second, CostKind);
+    }
     break;
   }
   case Intrinsic::cttz:
   case Intrinsic::ctlz:
   case Intrinsic::ctpop: {
     auto LT = getTypeLegalizationCost(RetTy);
-    if (ST->hasVInstructions() && ST->hasStdExtZvbb() && LT.second.isVector())
-      return LT.first;
+    if (ST->hasVInstructions() && ST->hasStdExtZvbb() && LT.second.isVector()) {
+      unsigned Op = (Intrinsic::cttz)   ? RISCV::VCTZ_V
+                    : (Intrinsic::ctlz) ? RISCV::VCLZ_V
+                                        : RISCV::VCPOP_V;
+      return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind);
+    }
     break;
   }
   case Intrinsic::abs: {
diff --git a/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll b/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll
index 0b2c8da4438da2..6259c31a08bc1f 100644
--- a/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll
@@ -7,22 +7,22 @@ define void @fabs() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call float @llvm.fabs.f32(float undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x float> @llvm.fabs.v2f32(<2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %4 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x float> @llvm.fabs.nxv1f32(<vscale x 1 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x float> @llvm.fabs.nxv2f32(<vscale x 2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x float> @llvm.fabs.nxv8f32(<vscale x 8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x float> @llvm.fabs.nxv16f32(<vscale x 16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 8 x float> @llvm.fabs.nxv8f32(<vscale x 8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %10 = call <vscale x 16 x float> @llvm.fabs.nxv16f32(<vscale x 16 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call double @llvm.fabs.f64(double undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x double> @llvm.fabs.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %15 = call <16 x double> @llvm.fabs.v16f64(<16 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x double> @llvm.fabs.nxv1f64(<vscale x 1 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x double> @llvm.fabs.nxv8f64(<vscale x 8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 4 x double> @llvm.fabs.nxv4f64(<vscale x 4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 8 x double> @llvm.fabs.nxv8f64(<vscale x 8 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call float @llvm.fabs.f32(float undef)
@@ -48,17 +48,29 @@ define void @fabs() {
 }
 
 define void @fabs_f16() {
-; CHECK-LABEL: 'fabs_f16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fabs.f16(half undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+; ZVFH-LABEL: 'fabs_f16'
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fabs.f16(half undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> undef)
+; ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; ZVFHMIN-LABEL: 'fabs_f16'
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call half @llvm.fabs.f16(half undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %2 = call <2 x half> @llvm.fabs.v2f16(<2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %3 = call <4 x half> @llvm.fabs.v4f16(<4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %4 = call <8 x half> @llvm.fabs.v8f16(<8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %5 = call <16 x half> @llvm.fabs.v16f16(<16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %6 = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %7 = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %8 = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %9 = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> undef)
+; ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call half @llvm.fabs.f16(half undef)
   call <2 x half> @llvm.fabs.v2f16(<2 x half> undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll b/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll
index be9c19dc59a852..446627f6bf3c0e 100644
--- a/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fp-sqrt-pow.ll
@@ -8,30 +8,30 @@ define void @sqrt() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.sqrt.v2bf16(<2 x bfloat> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.sqrt.v4bf16(<4 x bfloat> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x bfloat> @llvm.sqrt.v16bf16(<16 x bfloat> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x bfloat> @llvm.sqrt.nxv2bf16(<vscale x 2 x bfloat> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.sqrt.nxv4bf16(<vscale x 4 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.sqrt.nxv8bf16(<vscale x 8 x bfloat> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.sqrt.nxv16bf16(<vscale x 16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.sqrt.nxv8bf16(<vscale x 8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.sqrt.nxv16bf16(<vscale x 16 x bfloat> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call float @llvm.sqrt.f32(float undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call <2 x float> @llvm.sqrt.v2f32(<2 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %13 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %14 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <vscale x 1 x float> @llvm.sqrt.nxv1f32(<vscale x 1 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 2 x float> @llvm.sqrt.nxv2f32(<vscale x 2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 8 x float> @llvm.sqrt.nxv8f32(<vscale x 8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 16 x float> @llvm.sqrt.nxv16f32(<vscale x 16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %17 = call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %18 = call <vscale x 8 x float> @llvm.sqrt.nxv8f32(<vscale x 8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %19 = call <vscale x 16 x float> @llvm.sqrt.nxv16f32(<vscale x 16 x float> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %20 = call double @llvm.sqrt.f64(double undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = call <16 x double> @llvm.sqrt.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %22 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %23 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %24 = call <16 x double> @llvm.sqrt.v16f64(<16 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = call <vscale x 1 x double> @llvm.sqrt.nxv1f64(<vscale x 1 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %26 = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = call <vscale x 4 x double> @llvm.sqrt.nxv4f64(<vscale x 4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %28 = call <vscale x 8 x double> @llvm.sqrt.nxv8f64(<vscale x 8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %26 = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %27 = call <vscale x 4 x double> @llvm.sqrt.nxv4f64(<vscale x 4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %28 = call <vscale x 8 x double> @llvm.sqrt.nxv8f64(<vscale x 8 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   call bfloat @llvm.sqrt.bf16(bfloat undef)
@@ -71,11 +71,11 @@ define void @sqrt_f16() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x half> @llvm.sqrt.v2f16(<2 x half> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x half> @llvm.sqrt.v4f16(<4 x half> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x half> @llvm.sqrt.v8f16(<8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x half> @llvm.sqrt.v16f16(<16 x half> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x half> @llvm.sqrt.nxv2f16(<vscale x 2 x half> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x half> @llvm.sqrt.nxv4f16(<vscale x 4 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x half> @llvm.sqrt.nxv16f16(<vscale x 16 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %8 = call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = call <vscale x 16 x half> @llvm.sqrt.nxv16f16(<vscale x 16 x half> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost...
[truncated]

lukel97 · 2024-11-28T12:12:05Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

-    if (ST->hasVInstructions() && LT.second.isVector())
-      return LT.first;
+    // FIXME: not get the correct cost about the llvm.sqrt.vxbf16
+    // LT.second is promote llvm::MVT::f32


Not for this PR, but we probably want to handle the zvfhmin/zvfbfmin promotion for all intrinsics. We could pull this out of the switch statement later.

I will create new pr deal with f16 && bf16

lukel97 · 2024-11-28T12:13:29Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

+    // FIXME: not get the correct cost about the llvm.sqrt.vxbf16
+    // LT.second is promote llvm::MVT::f32


Is this comment still needed? The costs in the tests look correct

lukel97 · 2024-11-28T12:13:39Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

+               !ST->hasVInstructionsF16())
+        Opcodes = {RISCV::VFWCVT_F_F_V, RISCV::VFSQRT_V, RISCV::VFNCVT_F_F_W};
+      else
+        Opcodes = {RISCV::VFSQRT_V};


Opcodes isn't used?

lukel97 · 2024-11-30T10:50:58Z

llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll

@@ -1,28 +1,28 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfhmin,+zvfhbmin | FileCheck %s --check-prefixes=CHECK,ZVFHMIN


Should this be +zvfbfmin?

lukel97 · 2024-11-30T10:51:21Z

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

  case Intrinsic::fabs:
  case Intrinsic::sqrt: {
    auto LT = getTypeLegalizationCost(RetTy);
-    if (ST->hasVInstructions() && LT.second.isVector())
-      return LT.first;
+    // TODO: add f16/bf16, bf16 with zvfhbmin && f16 with zvfhmin


Suggested change

// TODO: add f16/bf16, bf16 with zvfhbmin && f16 with zvfhmin

// TODO: add f16/bf16, bf16 with zvfbfmin && f16 with zvfhmin

sorry , fixed

lukel97 · 2024-12-01T09:01:08Z

llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll

@@ -1,28 +1,28 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh | FileCheck %s --check-prefixes=CHECK,ZVFH


Can you add zvfbfmin to the ZVFH run line too? I think that matches the other tests with bfloat

lukel97 · 2024-12-02T09:27:26Z

llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll

+  call <2 x bfloat> @llvm.fabs.v2f16(<2 x bfloat> undef)
+  call <4 x bfloat> @llvm.fabs.v4f16(<4 x bfloat> undef)
+  call <8 x bfloat> @llvm.fabs.v8f16(<8 x bfloat> undef)
+  call <16 x bfloat> @llvm.fabs.v16f16(<16 x bfloat> undef)


This should be @llvm.fabs.v2bf16?

sorry ， fixed

lukel97 · 2024-12-02T09:29:40Z

llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll

 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
  call half @llvm.fabs.f16(half undef)
+  call <2 x bfloat> @llvm.fabs.v2f16(<2 x bfloat> undef)


Can you move the bf16 lines into the main @Fabs() test? The f16 tests were split out just to show the difference between zvfh and zvfhmin IIRC, which bf16 won't need

lukel97

LGTM, thanks

[TTI][RISCV] Implement cost of some intrinsics with LMUL

48ce8de

Intrinsics include: sadd_sat/ssub_sat/uadd_sat/usub_sat/fabs/fsqrt/cttz/ctlz/ctpop

LiqinWeng requested review from lukel97 and ElvisWang123 November 27, 2024 12:26

llvmbot added backend:RISC-V llvm:analysis labels Nov 27, 2024

lukel97 reviewed Nov 28, 2024

View reviewed changes

remove the fp15 && bf16 implement

daf8269

lukel97 reviewed Nov 30, 2024

View reviewed changes

fix the comments

d3e0c4a

lukel97 reviewed Dec 1, 2024

View reviewed changes

address the comment

20d9d79

lukel97 reviewed Dec 2, 2024

View reviewed changes

fix the comment

30ba0c7

lukel97 approved these changes Dec 2, 2024

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[TTI][RISCV] Implement cost of some intrinsics with LMUL #117874

[TTI][RISCV] Implement cost of some intrinsics with LMUL #117874

LiqinWeng commented Nov 27, 2024

llvmbot commented Nov 27, 2024 •

edited

Loading

lukel97 Nov 28, 2024

LiqinWeng Nov 29, 2024

lukel97 Nov 28, 2024

LiqinWeng Nov 29, 2024

lukel97 Nov 28, 2024

LiqinWeng Nov 29, 2024

lukel97 Nov 30, 2024

LiqinWeng Dec 1, 2024

lukel97 Nov 30, 2024

LiqinWeng Dec 1, 2024

lukel97 Dec 1, 2024

LiqinWeng Dec 2, 2024

lukel97 Dec 2, 2024

LiqinWeng Dec 2, 2024

lukel97 Dec 2, 2024

LiqinWeng Dec 2, 2024

lukel97 left a comment

		// FIXME: not get the correct cost about the llvm.sqrt.vxbf16
		// LT.second is promote llvm::MVT::f32

	// TODO: add f16/bf16, bf16 with zvfhbmin && f16 with zvfhmin
	// TODO: add f16/bf16, bf16 with zvfbfmin && f16 with zvfhmin

		@@ -1,28 +1,28 @@
		; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
		; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zfh,+zvfh \| FileCheck %s --check-prefixes=CHECK,ZVFH

[TTI][RISCV] Implement cost of some intrinsics with LMUL #117874

Are you sure you want to change the base?

[TTI][RISCV] Implement cost of some intrinsics with LMUL #117874

Conversation

LiqinWeng commented Nov 27, 2024

llvmbot commented Nov 27, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

lukel97 left a comment

Choose a reason for hiding this comment

llvmbot commented Nov 27, 2024 •

edited

Loading